Loading library
Reading input
1. Distribution of Mosquito by Species
mosquito_data %>% filter(YEAR == "2004" | YEAR == "2013") %>%
select(VECTOR, COUNTRY, X, Y) %>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', hoverinfo='COUNTRY', split = ~VECTOR, width = 800, height = 800) %>%
layout(title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
Analysis: Brazil has the highest density of ‘Aedes Aegypti’, similarly Taiwan has the density of ‘Aedes albopictus’. The first perception problem here is the fact the number of datapoints is directly associated with the population which is not the case.
p1 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2004')
p2 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2013')
p3 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes albopictus Mosquito Population vs. Country in 2004')
p4 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = ' Aedes albopictus Mosquito Population vs. Country in 2013')
p1
## Warning: `line.width` does not currently support multiple values.
p2
## Warning: `line.width` does not currently support multiple values.
p3
## Warning: `line.width` does not currently support multiple values.
p4
## Warning: `line.width` does not currently support multiple values.
Analysis: Going from 2004 to 2013 the population of ‘Aedes aegypti’ increased in the world, especially in Brazil. However the incase of ‘Aedes albopictus’ the population decreased. Again this could be the case of the data points alone.
2. Geoplot of mosquitos per country
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~count, text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~count, colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
Analysis: Due to rolling up of data we have lost the granularity of the data (level of the data), also the scale of data is mostly below 5K, while only Brazil is close to 10K, thus the no country stand out.
3a. Equirectangular projection and choropleth color log(Z)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
3b Conic Equal Area projection
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Conic equal area')
)
)
Analysis: Making the scale in log brings more information in chart compared to non log values. However the there is not much difference in the two types of projection when it comes to analysis.
4. Resolve and Map for Brazil with discreet cuts
mosquito_data$X_cut = cut_interval(mosquito_data$X, n = 100, labels=FALSE)
mosquito_data$Y_cut = cut_interval(mosquito_data$Y, n = 100, labels=FALSE)
mosquito_data %>% filter(YEAR == "2013" & COUNTRY == "Brazil") %>% group_by(COUNTRY, VECTOR, X_cut, Y_cut) %>%
summarise(X_mean = mean(X), Y_mean = mean(Y), count=n()) %>%
plot_mapbox(lon = ~X_mean, lat = ~Y_mean, mode = 'scattermapbox', split = ~VECTOR, hoverinfo='count', width = 800, height = 800, color = ~count) %>% add_trace(z = ~count) %>% layout(title = 'Mosquito Population vs. Brazil', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
## Warning: 'scattermapbox' objects don't have these attributes: 'z'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'lon', 'lat', 'mode', 'text', 'hovertext', 'line', 'connectgaps', 'marker', 'fill', 'fillcolor', 'textfont', 'textposition', 'selected', 'unselected', 'subplot', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'lonsrc', 'latsrc', 'textsrc', 'hovertextsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Analysis: Taking the mean of the cordintaes definetly helped the cases, although this reduced the level of detail. In terms of population of mosquito we find that “Patos”, “Bauru” have the highest count of mosquitos.
Assignment 2 Visualization of Income of Swedish Household
2. Violin Plot showing the distribution of wealth
income_data %>% plot_ly(x =~age_group ,y = ~X2016, type = 'violin', split = ~age_group, box = list(visible = T), meanline = list(visible = T)) %>% layout(yaxis = list(title = "income Distribution", zeroline = F), xaxis = list(title = "Age Group"), title = "Income Distribution vs. Age Group in Sweden 2016")
Analysis: As expected the bulk of young earn easier than senior and adults, however there appears to be two modes here, one at around 200 and one around 500, same as for senior. The weath spread is highest in Seniors.
3. Surface Plot showing the distribution of wealth
smoothed_surface=interp(income_data_recasted$Young, income_data_recasted$Adult, income_data_recasted$Senior, duplicate = "mean")
plot_ly(x=~smoothed_surface$x, y=~smoothed_surface$y, z=~smoothed_surface$z, type="surface") %>% layout(yaxis = list(title = "Adult Income", zeroline = F), xaxis = list(title = "Young Income"), title = "Surface Plot of Income Distribution between Young, Adult and Senior")
Analysis:
4. Choropleth of Income distribution
#plot_ly()%>%add_sf(data=rds, split=~NAME_2, color=~Price, showlegend=F, alpha=1)
Apendix
knitr::opts_chunk$set(echo = FALSE)
library(data.table)
library(dplyr)
library(reshape2)
library(plotly)
library(ggplot2)
library(akima)
library(sf)
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1IjoiYW51Ymhhdi1kaWtzaGl0IiwiYSI6ImNqbWI1dDVjeTAwNG0zd21pcHRoMTE4YWgifQ.r4m2I2AYIvAzCNyUn4tiuw')
knitr::opts_chunk$set(echo = TRUE)
mosquito_data <- read.csv("aegypti_albopictus.csv")
mosquito_data %>% filter(YEAR == "2004" | YEAR == "2013") %>%
select(VECTOR, COUNTRY, X, Y) %>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', hoverinfo='COUNTRY', split = ~VECTOR, width = 800, height = 800) %>%
layout(title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
p1 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2004')
p2 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes aegypti") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes aegypti Mosquito Population vs. Country in 2013')
p3 <- mosquito_data %>% filter(YEAR == "2004" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = 'Aedes albopictus Mosquito Population vs. Country in 2004')
p4 <- mosquito_data %>% filter(YEAR == "2013" & VECTOR == "Aedes albopictus") %>% group_by(COUNTRY, X, Y) %>% summarise(count = n())%>% plot_mapbox(lon = ~X, lat = ~Y, mode = 'scattermapbox', size=~count, hoverinfo = 'VECTOR') %>% layout(title = ' Aedes albopictus Mosquito Population vs. Country in 2013')
p1
p2
p3
p4
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~count, text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~count, colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Equirectangular')
)
)
mosquito_data %>% group_by(COUNTRY, COUNTRY_ID) %>% summarise(count = n()) %>% plot_geo(width = 1200, height = 800) %>% add_trace(
z = ~log(count), text = ~COUNTRY, locations = ~COUNTRY_ID,
color = ~log(count), colors = 'Purples'
) %>%
colorbar(title = "Mosquito Population in Log") %>%
layout(
title = 'Mosquito Population vs. Country', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4),
geo = list(projection = list(type = 'Conic equal area')
)
)
mosquito_data$X_cut = cut_interval(mosquito_data$X, n = 100, labels=FALSE)
mosquito_data$Y_cut = cut_interval(mosquito_data$Y, n = 100, labels=FALSE)
mosquito_data %>% filter(YEAR == "2013" & COUNTRY == "Brazil") %>% group_by(COUNTRY, VECTOR, X_cut, Y_cut) %>%
summarise(X_mean = mean(X), Y_mean = mean(Y), count=n()) %>%
plot_mapbox(lon = ~X_mean, lat = ~Y_mean, mode = 'scattermapbox', split = ~VECTOR, hoverinfo='count', width = 800, height = 800, color = ~count) %>% add_trace(z = ~count) %>% layout(title = 'Mosquito Population vs. Brazil', margin = list(l = 50, r = 50, b = 100, t = 100, pad = 4))
income_data <- read.csv("Income_Data.csv")
rds <- readRDS("gadm36_SWE_2_sf.rds")
income_data_recasted <- dcast(income_data, region+type.of.household~age, value.var = "X2016")
setnames(income_data_recasted, old=c("18-29 years", "30-49 years", "50-64 years" ), new=c("Young", "Adult", "Senior"))
income_data$age_group <- ifelse(income_data$age == "18-29 years", "Young",
ifelse(income_data$age == "30-49 years", "Adult","Senior"))
income_data %>% plot_ly(x =~age_group ,y = ~X2016, type = 'violin', split = ~age_group, box = list(visible = T), meanline = list(visible = T)) %>% layout(yaxis = list(title = "income Distribution", zeroline = F), xaxis = list(title = "Age Group"), title = "Income Distribution vs. Age Group in Sweden 2016")
smoothed_surface=interp(income_data_recasted$Young, income_data_recasted$Adult, income_data_recasted$Senior, duplicate = "mean")
plot_ly(x=~smoothed_surface$x, y=~smoothed_surface$y, z=~smoothed_surface$z, type="surface") %>% layout(yaxis = list(title = "Adult Income", zeroline = F), xaxis = list(title = "Young Income"), title = "Surface Plot of Income Distribution between Young, Adult and Senior")
#plot_ly()%>%add_sf(data=rds, split=~NAME_2, color=~Price, showlegend=F, alpha=1)